CREATE TABLE IF NOT EXISTS employees12 (
name STRING COMMENT 'Employee name',
salary FLOAT COMMENT 'Employee salary',
FexTax FLOAT,
StateTax FLOAT,
Insurance FLOAT,
address STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n';
Mary Smith,80000.0,.2,.05,.1,100 Ontario St. Chicago IL 60601
Todd Jones,70000.0,.15,.03,.1,108 Oak Park Chicago IL 60700
Bill King,60000.0,.15,.03,.1,10 Ontario St. Chicago IL 60700
Sam Kim,110000.0,.2,.05,.1,101 Uptown Rd. Ithaca NY 14850
Maria Booth,60000.0,.15,.03,.1,46 South Gate Dr. State College PA 16801
Joe Howe,75000.0,.2,.5,.1,24 Garden Ave. Ithaca NY 14853
LOAD DATA LOCAL INPATH '/empdata-comma.txt'
OVERWRITE INTO TABLE employees12;
The LOCAL keyword:
hive> select * from employees12;
OK
John Smith 100000.0 0.2 0.05 0.1 1 Michigan Ave. Chicago IL 60600
Mary Smith 80000.0 0.2 0.05 0.1 100 Ontario St. Chicago IL 60601
Todd Jones 70000.0 0.15 0.03 0.1 108 Oak Park Chicago IL 60700
Bill King 60000.0 0.15 0.03 0.1 10 Ontario St. Chicago IL 60700
Sam Kim 110000.0 0.2 0.05 0.1 101 Uptown Rd. Ithaca NY 14850
Maria Booth 60000.0 0.15 0.03 0.1 46 South Gate Dr. State College PA 16801
Joe Howe 75000.0 0.2 0.5 0.1 24 Garden Ave. Ithaca NY 14853
CREATE TABLE IF NOT EXISTS employees11 (
name STRING COMMENT 'Employee name',
salary FLOAT COMMENT 'Employee salary',
FedTax FLOAT,
StateTax FLOAT,
Insurance FLOAT,
address STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n';
Todd Jones 70000.0 .15 .03 .1 108 Oak Park Chicago IL 60700
Bill King 60000.0 .15 .03 .1 10 Ontario St. Chicago IL 60700
Sam Kim 110000.0 .2 .05 .1 101 Uptown Rd. Ithaca NY 14850
Maria Booth 60000.0 .15 .03 .1 46 South Gate Dr. State College PA 16801
Joe Howe 75000.0 .2 .5 .1 24 Garden Ave. Ithaca NY 14853
LOAD DATA LOCAL INPATH '/empdata-tab.txt'
OVERWRITE INTO TABLE employees11;
Time taken: 2.4 seconds
hive> select * from employees11;
OK
John Smith 100000.0 0.2 0.05 0.1 1 Michigan Ave. Chicago IL 60600
Mary Smith 80000.0 0.2 0.05 0.1 100 Ontario St. Chicago IL 60601
Todd Jones 70000.0 0.15 0.03 0.1 108 Oak Park Chicago IL 60700
Bill King 60000.0 0.15 0.03 0.1 10 Ontario St. Chicago IL 60700
Sam Kim 110000.0 0.2 0.05 0.1 101 Uptown Rd. Ithaca NY 14850
Maria Booth 60000.0 0.15 0.03 0.1 46 South Gate Dr. State College PA 16801
Joe Howe 75000.0 0.2 0.5 0.1 24 Garden Ave. Ithaca NY 14853
CREATE TABLE IF NOT EXISTS employees20 (
name STRING COMMENT 'Employee name',
salary FLOAT COMMENT 'Employee salary',
subordinates ARRAY<STRING> COMMENT 'Names of subordinates',
deductions MAP<STRING, FLOAT>
COMMENT 'Keys are deductions names, values are percentages',
address STRUCT<street:STRING, city:STRING, state:STRING, ZIP:INT>
COMMENT 'Home address')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '^'
COLLECTION ITEMS TERMINATED BY '#'
MAP KEYS TERMINATED BY '@'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
John Smith^100000.0^Mary Smith#David Wells^Federal Taxes@0.2#State Taxes@0.05#Insurance@0.1^1 Michigan Ave.#Chicago#IL#60600
Mary Smith^80000.0^Bill King^Federal Taxes@0.2#State Taxes@0.05#Insurance@0.1^100 Ontario St.#Chicago#IL#60601
Jason Yang^150000.0^Diana Foster#Kelli Doe#Kevin Hoover^Federal Taxes@0.2#State Taxes@0.05#Insurance@0.1^101 Uptown Rd.#Ithaca#NY#14850
Ted Wang^120000.0^Diana Johnson#Kevin Li#Dan Jones^Federal Taxes@0.2#State Taxes@0.05#Insurance@0.1^21 Farm Rd.#Ithaca#NY#14850
Mike McPheron^150000.0^Marvin Xu#Elise Lee^Federal Taxes@0.02#State Taxes@0.05#Insurance@0.1^12 Bush Ave.#Ithaca#NY#14850
LOAD DATA LOCAL INPATH '/empdata.txt'
OVERWRITE INTO TABLE employees20;
hive> select * from employees20;
OK
John Smith 100000.0 ["Mary Smith","David Wells"] {"Federal Taxes":0.2,"State Taxes":0.05,"Insurance":0.1} {"street":"1 Michigan Ave.","city":"Chicago","state":"IL","zip":60600}
Mary Smith 80000.0 ["Bill King"] {"Federal Taxes":0.2,"State Taxes":0.05,"Insurance":0.1} {"street":"100 Ontario St.","city":"Chicago","state":"IL","zip":60601}
Jason Yang 150000.0 ["Diana Foster","Kelli Doe","Kevin Hoover"] {"Federal Taxes":0.2,"State Taxes":0.05,"Insurance":0.1} {"street":"101 Uptown Rd.","city":"Ithaca","state":"NY","zip":14850}
Ted Wang 120000.0 ["Diana Johnson","Kevin Li","Dan Jones"] {"Federal Taxes":0.2,"State Taxes":0.05,"Insurance":0.1} {"street":"21 Farm Rd.","city":"Ithaca","state":"NY","zip":14850}
Mike McPheron 150000.0 ["Marvin Xu","Elise Lee"] {"Federal Taxes":0.02,"State Taxes":0.05,"Insurance":0.1} {"street":"12 Bush Ave.","city":"Ithaca","state":"NY","zip":14850}
Time taken: 0.58 seconds, Fetched: 5 row(s)
CREATE TABLE IF NOT EXISTS employees23 (
name STRING COMMENT 'Employee name',
salary FLOAT COMMENT 'Employee salary',
subordinates ARRAY<STRING> COMMENT 'Names of subordinates',
deductions MAP<STRING, FLOAT>
COMMENT 'Keys are deductions names, values are percentages',
address STRUCT<street:STRING, city:STRING, state:STRING, ZIP:INT>
COMMENT 'Home address')
PARTITIONED BY (country STRING, state STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '^'
COLLECTION ITEMS TERMINATED BY '#'
MAP KEYS TERMINATED BY '@'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '/empdata-us-il.txt'
OVERWRITE INTO TABLE employees23
PARTITION (country = 'US', state = 'IL');
Select * from employees23;
LOAD DATA LOCAL INPATH '/empdata-us-ny.txt'
OVERWRITE INTO TABLE employees23
PARTITION (country = 'US', state = 'NY');
LOAD DATA LOCAL INPATH '/empdata-us-ca.txt'
OVERWRITE INTO TABLE employees23
PARTITION (country = 'US', state = 'CA');
See the whole table with three partitions:
Select * from employees23;
Select * from employees23 where state="NY";
Select * from employees23 where address.state="NY";
Select name, salary, address.city, address.state
from employees23
where salary > 120000;
Select name, salary, address.city, address.state
from employees23
where salary > 120000 and (state = 'CA' or state = 'IL');
Those of the employees23 table:
.../employees23/country=US/state=CA
.../employees23/country=US/state=IL
.../employees23/country=US/state=NY
The location of the loaded file and its contents:
CREATE TABLE inserttbl like employees20;
INSERT OVERWRITE TABLE inserttbl
SELECT * FROM employees20
WHERE salary > 100000.0;
SELECT * FROM inserttbl;
OVERWRITE, any previous contents of the partition (or whole table if not partitioned) are replacedINSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state = 'OR')
SELECT * FROM staged_employees se
WHERE se.cnty = 'US' AND se.st = 'OR';
OVERWRITE or with INTO, Hive appends the data rather than replaces itINSERT INTO TABLE employees
PARTITION (country = 'US', state = 'OR')
SELECT * FROM staged_employees se
WHERE se.cnty = 'US' AND se.st = 'OR';
FROM staged_employees se
INSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state = 'OR')
SELECT * WHERE se.cnty = 'US' AND se.st = 'OR'
INSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state = 'CA')
SELECT * WHERE se.cnty = 'US' AND se.st = 'CA'
INSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state = 'IL')
SELECT * WHERE se.cnty = 'US' AND se.st = 'IL';
Hive’s dynamic partition can infer the partitions to create based on query parameters:
INSERT OVERWRITE TABLE employees
PARTITION (country, state)
SELECT ..., se.cnty, se.st
FROM staged_employees se;
In this example, Hive determines the values of the partition keys, country and state, from the last two columns in the SELECT clause
Different names in staged_employees emphasize that the relationship between the source column values and the output partition values is by position only and not by matching on names
Static value for the country (US) and a dynamic value for the state. The static partition keys must come before the dynamic partition keys
INSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state)
SELECT ..., se.cnty, se.st
FROM staged_employees se
WHERE se.cnty = 'US';
| Name | Default | Description |
|---|---|---|
hive.exec.dynamic.partition |
false |
Set to true to enable dynamic partitioning |
hive.exec.dynamic.partition.mode |
strict |
Set to nonstrict to enable all partitions to be determined dynamically |
hive.exec.max.dynamic.partitions.pernode |
100 |
The maximum number of dynamic partitions that can be created by each mapper or reducer. Raises a fatal error if one mapper or reducer attempts to create more than the threshold |
hive.exec.max.dynamic.partitions |
+1000 |
The total number of dynamic partitions that can be created by one statement with dynamic partitioning. Raises a fatal error if the limit is exceeded |
hive.exec.max.created.files |
100000 |
The maximum total number of files that can be created globally. A Hadoop counter is used to track the number of files created. Raises a fatal error if the limit is exceeded |
hive> set hive.exec.dynamic.partition=true;
hive> set hive.exec.dynamic.partition.mode=nonstrict;
hive> set hive.exec.max.dynamic.partitions.pernode=1000;
hive> INSERT OVERWRITE TABLE employees
> PARTITION (country, state)
> SELECT ..., se.cty, se.st
> FROM staged_employees se;
CREATE TABLE ca_employees
AS SELECT name, salary, address
FROM employees
WHERE state = 'CA';
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/ca_employees'
SELECT name, salary, address
FROM employees
WHERE state = 'CA';
Independent of how the data is actually stored in the source table, it is written to files with all fields serialized as strings
hive> ! ls /tmp/ca_employees;
000000_0
hive> ! cat /tmp/ca_employees/000000_0
John Doe100000.0201 San Antonio CircleMountain ViewCA94040
Mary Smith80000.01 Infinity LoopCupertinoCA95014
...